import pandas as pd
pd.options.display.float_format ='{:.2f}'.format
expense = pd.read_csv(r"C:\Users\yilin\OneDrive\Desktop\red cross\charitydata-119219814RR0001-expenses.csv", skiprows=1, index_col=0)
expense = expense.iloc[:,::-1]
total_expense = expense.iloc[-1,:].to_frame()
total_expense
| Total expenditures ($) | |
|---|---|
| 2003 | 191334231.00 |
| 2004 | 195510000.00 |
| 2005 | 0.00 |
| 2006 | 283949891.00 |
| 2007 | 257229755.00 |
| 2008 | 299197026.00 |
| 2009 | 415271172.00 |
| 2010 | 366804504.00 |
| 2011 | 419674677.00 |
| 2012 | 386445763.00 |
| 2013 | 306388801.00 |
| 2014 | 282271763.00 |
| 2015 | 310099360.00 |
| 2016 | 313153540.00 |
| 2017 | 516908994 |
| 2018 | 457956426 |
| 2019 | 381758320 |
| 2020 | 347602783 |
| 2021 | 553402595 |
# change data type
total_expense = total_expense['Total expenditures ($)'].astype('int64')
revenue = pd.read_csv(r"C:\Users\yilin\OneDrive\Desktop\red cross\charitydata-119219814RR0001-revenue.csv", skiprows=1, index_col=0)
total_revenue = revenue.iloc[-1,::-1].to_frame()
total_revenue
| Total revenue ($) | |
|---|---|
| 2003 | 191388361.00 |
| 2004 | 197908000.00 |
| 2005 | 485211918.00 |
| 2006 | 362879651.00 |
| 2007 | 230604119.00 |
| 2008 | 253196921.00 |
| 2009 | 325330082.00 |
| 2010 | 449211535.00 |
| 2011 | 403926824.00 |
| 2012 | 349698551.00 |
| 2013 | 255564842.00 |
| 2014 | 328775664.00 |
| 2015 | 275720312.00 |
| 2016 | 314648445.00 |
| 2017 | 612082298 |
| 2018 | 476646615 |
| 2019 | 340074588 |
| 2020 | 336899823 |
| 2021 | 563961036 |
# change data type
total_revenue = total_revenue['Total revenue ($)'].astype('int64')
# data visualization
# define colors
GRAY1, GRAY2, GRAY3 = '#231F20', '#414040', '#555655'
GRAY4, GRAY5, GRAY6 = '#646369', '#76787B', '#828282'
GRAY7, GRAY8, GRAY9 = '#929497', '#A6A6A5', '#BFBEBE'
BLUE1, BLUE2, BLUE3, BLUE4 = '#174A7E', '#4A81BF', '#94B2D7', '#94AFC5'
RED1, RED2 = '#C3514E', '#E6BAB7'
GREEN1, GREEN2 = '#0C8040', '#9ABB59'
ORANGE1 = '#F79747'
import plotly.graph_objects as go
import plotly.express as px
# line plot
fig = go.Figure(data=go.Scatter(x=total_expense.index.values[-5:],
y=total_expense.values[-5:],
mode='lines',
line=dict(color=ORANGE1, width=3),
name='Total Expense'))
fig.add_trace(
go.Scatter(x=total_revenue.index.values[-5:],
y=total_revenue.values[-5:],
mode='lines',
line=dict(color=BLUE1, width=3),
name='Total Revenue'))
# scatter plot
fig.add_trace(
go.Scatter(x=total_revenue.index.values[-3:],
y=total_revenue.values[-3:],
mode='markers+text',
marker=dict(color=BLUE1, size=12),
name='Total Revenue'))
fig.add_trace(
go.Scatter(x=total_expense.index.values[-3:],
y=total_expense.values[-3:],
mode='markers+text',
marker=dict(color=ORANGE1, size=12),
name='Total Expense'))
# vertical line
fig.add_vline(x=2, line_width=2.5, line_color=GRAY1)
# yaxes
fig.update_yaxes(range=(0, 700000000),
title_text='$',
title_standoff=15,
title_font=dict(size=16, family='Arial'),
tickfont=dict(size=16, family='Arial'),
automargin=True)
# annotation
annotation = []
# label lines directly
annotation.append(
dict(xref='paper',
yref='paper',
x=0,
y=0.95,
text='Total Revenue',
font=dict(family='Arial', size=15, color=BLUE1),
showarrow=False))
annotation.append(
dict(
xref='paper',
yref='paper',
x=-0.1,
y=-0.27,
xanchor='left',
yanchor='bottom',
text='Data source: red cross website',
font=dict(family='Arial', size=11.5, color=GRAY3),
showarrow=False,
align='left'))
annotation.append(
dict(xref='paper',
yref='paper',
x=0,
y=0.65,
text='Total Expense',
font=dict(family='Arial', size=15, color=ORANGE1),
showarrow=False))
annotation.append(
dict(xref='paper',
yref='paper',
x=1.12,
y=1.2,
text='<b>From 2019 to 2020, our expense are higher than revenue</b><br>We nearly maintain a balance between revenue and expense in 2021',
font=dict(family='Arial', size=13, color=GRAY5),
align='left',
showarrow=False))
annotation.append(
dict(x=-0.1,
y=1.45,
xref='paper',
yref='paper',
text='<b>Please balance our budget</b>',
font=dict(size=30, color=GRAY3),
showarrow=False))
annotation.append(
dict(x=-0.1,
y=1.28,
xref='paper',
yref='paper',
text='total revenue vs total expense over time',
font=dict(family='Arial', size=18, color=GRAY2),
showarrow=False))
fig.update_layout(template='simple_white',
font=dict(family='Arial', size=15, color='#646369'),
showlegend=False,
margin=dict(t=150, r=100),
height= 500,
width=800,
annotations = annotation,
hovermode="x unified",
hoverlabel=dict(font_size=11))
fig.show()
fig.write_html("total Revenue VS Total Expense.html")
From above line chart, we can clearly see the trend of total revenue and total expense over time in the recent 5 years.
# only include Year 2020 and 2021
expense_2020 = expense.iloc[0:-3, -2:]
expense_2020 = expense_2020.astype('int64')
# exclude 0 expense type
mask = (expense_2020['2021'] == 0) & (expense_2020['2020'] == 0)
expense_2020 = expense_2020[~mask]
# customize expense type name
expense_2020 = expense_2020.rename({'Advertising and promotion ($)': 'Advertising and promotion',
'Travel and vehicle expenses ($)': 'Travel and vehicle expenses',
'Interest and bank charges ($)': 'Interest and bank charges',
'Licences, memberships, and dues ($)': 'Licences, memberships, and dues',
'Office supplies and expenses ($)': 'Office supplies and expenses',
'Occupancy costs ($)': 'Occupancy costs',
'Professional and consulting fees ($)': 'Professional and consulting fees',
'Education and training for staff and volunteers ($)': 'Education and training',
'Amortization of capitalized assets ($)': 'Amortization of capitalized assets',
'Total expenditure on all compensation (enter the amount reported at line 390 in Schedule 3, if applicable) ($)': 'Compensation',
'Fair market value of all donated goods used in charitable activities ($)': 'Donated goods value',
'All other expenditures not included in the amounts above (excluding gifts to qualified donees) ($)': 'Others'})
# visualize the table format
expense_2020 = expense_2020.reset_index().rename(columns={'Expenses': 'Expense Type'}).sort_values(by=['2021', '2020'], ascending=[False, False])
expense_2020.style.format(thousands=',').bar()
| Expense Type | 2020 | 2021 | |
|---|---|---|---|
| 11 | Others | 153,032,220 | 285,125,659 |
| 8 | Compensation | 127,270,040 | 198,166,435 |
| 1 | Travel and vehicle expenses | 18,941,337 | 17,513,991 |
| 6 | Professional and consulting fees | 6,743,859 | 10,652,833 |
| 4 | Office supplies and expenses | 7,188,940 | 9,303,593 |
| 10 | Amortization of capitalized assets | 6,811,807 | 7,161,607 |
| 5 | Occupancy costs | 6,017,281 | 5,365,778 |
| 0 | Advertising and promotion | 844,652 | 2,562,597 |
| 9 | Donated goods value | 2,522,565 | 2,410,956 |
| 2 | Interest and bank charges | 1,542,395 | 1,663,385 |
| 7 | Education and training | 1,026,716 | 1,236,862 |
| 3 | Licences, memberships, and dues | 162,434 | 182,894 |
# unpivot the dataframe
expense_2020_unpivot = expense_2020.melt(id_vars='Expense Type', var_name='Year', value_name='Expense')
expense_2020_unpivot
| Expense Type | Year | Expense | |
|---|---|---|---|
| 0 | Others | 2020 | 153032220 |
| 1 | Compensation | 2020 | 127270040 |
| 2 | Travel and vehicle expenses | 2020 | 18941337 |
| 3 | Professional and consulting fees | 2020 | 6743859 |
| 4 | Office supplies and expenses | 2020 | 7188940 |
| 5 | Amortization of capitalized assets | 2020 | 6811807 |
| 6 | Occupancy costs | 2020 | 6017281 |
| 7 | Advertising and promotion | 2020 | 844652 |
| 8 | Donated goods value | 2020 | 2522565 |
| 9 | Interest and bank charges | 2020 | 1542395 |
| 10 | Education and training | 2020 | 1026716 |
| 11 | Licences, memberships, and dues | 2020 | 162434 |
| 12 | Others | 2021 | 285125659 |
| 13 | Compensation | 2021 | 198166435 |
| 14 | Travel and vehicle expenses | 2021 | 17513991 |
| 15 | Professional and consulting fees | 2021 | 10652833 |
| 16 | Office supplies and expenses | 2021 | 9303593 |
| 17 | Amortization of capitalized assets | 2021 | 7161607 |
| 18 | Occupancy costs | 2021 | 5365778 |
| 19 | Advertising and promotion | 2021 | 2562597 |
| 20 | Donated goods value | 2021 | 2410956 |
| 21 | Interest and bank charges | 2021 | 1663385 |
| 22 | Education and training | 2021 | 1236862 |
| 23 | Licences, memberships, and dues | 2021 | 182894 |
expense_2020_unpivot['Percentage %'] = expense_2020_unpivot['Expense']/expense_2020_unpivot.groupby(['Year'])['Expense'].transform('sum') * 100
expense_2020_unpivot['($) Millions'] = expense_2020_unpivot['Expense']/1000000
expense_2020_unpivot
| Expense Type | Year | Expense | Percentage % | ($) Millions | |
|---|---|---|---|---|---|
| 0 | Others | 2020 | 153032220 | 46.08 | 153.03 |
| 1 | Compensation | 2020 | 127270040 | 38.32 | 127.27 |
| 2 | Travel and vehicle expenses | 2020 | 18941337 | 5.70 | 18.94 |
| 3 | Professional and consulting fees | 2020 | 6743859 | 2.03 | 6.74 |
| 4 | Office supplies and expenses | 2020 | 7188940 | 2.16 | 7.19 |
| 5 | Amortization of capitalized assets | 2020 | 6811807 | 2.05 | 6.81 |
| 6 | Occupancy costs | 2020 | 6017281 | 1.81 | 6.02 |
| 7 | Advertising and promotion | 2020 | 844652 | 0.25 | 0.84 |
| 8 | Donated goods value | 2020 | 2522565 | 0.76 | 2.52 |
| 9 | Interest and bank charges | 2020 | 1542395 | 0.46 | 1.54 |
| 10 | Education and training | 2020 | 1026716 | 0.31 | 1.03 |
| 11 | Licences, memberships, and dues | 2020 | 162434 | 0.05 | 0.16 |
| 12 | Others | 2021 | 285125659 | 52.67 | 285.13 |
| 13 | Compensation | 2021 | 198166435 | 36.61 | 198.17 |
| 14 | Travel and vehicle expenses | 2021 | 17513991 | 3.24 | 17.51 |
| 15 | Professional and consulting fees | 2021 | 10652833 | 1.97 | 10.65 |
| 16 | Office supplies and expenses | 2021 | 9303593 | 1.72 | 9.30 |
| 17 | Amortization of capitalized assets | 2021 | 7161607 | 1.32 | 7.16 |
| 18 | Occupancy costs | 2021 | 5365778 | 0.99 | 5.37 |
| 19 | Advertising and promotion | 2021 | 2562597 | 0.47 | 2.56 |
| 20 | Donated goods value | 2021 | 2410956 | 0.45 | 2.41 |
| 21 | Interest and bank charges | 2021 | 1663385 | 0.31 | 1.66 |
| 22 | Education and training | 2021 | 1236862 | 0.23 | 1.24 |
| 23 | Licences, memberships, and dues | 2021 | 182894 | 0.03 | 0.18 |
# distribution by expense type
fig = px.bar(expense_2020_unpivot,
x='Year',
y='Percentage %',
color='Expense Type',
color_discrete_sequence=[BLUE1, BLUE2] + [GRAY7] * 10,
barmode='stack',
text_auto=True,
hover_data=expense_2020_unpivot.columns,
template="simple_white")
fig.update_layout(xaxis=dict(ticks=""),
yaxis=dict(ticksuffix="%", tickformat=".2f", title=''),
showlegend=False,
width=500,
height=500)
From above 100% stacked bar chart, we can see the distribution of total expenses changes from the year 2020 to 2021.